In [ ]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
In [ ]:
import cPickle
from sklearn.linear_model import SGDClassifier
sgd = SGDClassifier(random_state=0)
for i in range(9):
X_batch, y_batch = cPickle.load(open("data/batch_%02d.pickle" % i))
sgd.partial_fit(X_batch, y_batch, classes=range(10))
In [ ]:
X_test, y_test = cPickle.load(open("data/batch_09.pickle"))
sgd.score(X_test, y_test)
In [ ]:
sgd = SGDClassifier()
accuracies = [0]
for i in range(9):
X_batch, y_batch = cPickle.load(open("data/batch_%02d.pickle" % i))
if i > 0:
accuracies.append(sgd.score(X_batch, y_batch))
sgd.partial_fit(X_batch, y_batch, classes=range(10))
In [ ]:
plt.plot(accuracies)
plt.xlabel("batches seen")
plt.ylabel("generalization performance")
In [ ]:
import pandas as pd
csv_iterator = pd.read_csv("data/digits.csv", chunksize=100)
sgd = SGDClassifier()
for batch in csv_iterator:
X = batch[batch.columns[:-1]]
y = batch[batch.columns[-1]]
sgd.partial_fit(X_batch, y_batch, classes=range(10))
In [ ]:
from sklearn.utils.testing import all_estimators
for name, Class in all_estimators():
if hasattr(Class, "partial_fit"):
print("%s.%s" % (Class.__module__.split(".")[1], name))
In [ ]:
sgd = SGDClassifier(random_state=0)
X_test, y_test = cPickle.load(open("data/batch_09.pickle"))
accuracies = []
for iteration in range(20):
for i in range(9):
X_batch, y_batch = cPickle.load(open("data/batch_%02d.pickle" % i))
sgd.partial_fit(X_batch, y_batch, classes=range(10))
accuracies.append(sgd.score(X_test, y_test))
In [ ]:
plt.plot(accuracies)